
R version 3.2.4 (2016-03-10) -- "Very Secure Dishes"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-apple-darwin13.4.0 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

[R.app GUI 1.67 (7152) x86_64-apple-darwin13.4.0]

[History restored from /Users/gholliba/.Rapp.history]

> ### Jonathan D. Klingler, Gary E. Hollibaugh, Jr., and Adam J. Ramey
> ### "Don't Know What You Got: A Bayesian Hierarchical Model of Neuroticism and Ideological Uncertainty
> ### Political Science Research and Methods
> ###
> ###
> ### Preprocessing and Summary Statistics (Tables B-1 through B-3)
> ### Description: This file takes the raw .sav file and converts it to a .Rda file for later processing
> ###
> ### Note 1: Run this file first before the others
> ### Note 2: Make sure to change the directory to the one containing this file
> 
> 
> rm(list=ls())
> library(memisc)
Loading required package: lattice
Loading required package: MASS

Attaching package: ‘memisc’

The following objects are masked from ‘package:stats’:

    contr.sum, contr.treatment, contrasts

The following objects are masked from ‘package:base’:

    as.array, trimws

> library(xtable)
> library(plyr)

Attaching package: ‘plyr’

The following object is masked from ‘package:memisc’:

    rename

> library(rio)
> library(foreign)
> 
> # loading the original dataset
> cces_big <- suppressWarnings(read.spss("CCES14_NYU_OUTPUT_Feb2015.sav", use.value.labels = TRUE, to.data.frame = TRUE, use.missings = FALSE))
> 
> # pulling out the relevant variables for the tables in appendix B
> summary_employ <- cces_big$employ
> summary_gender <- cces_big$gender
> summary_birthyr <- cces_big$birthyr
> summary_age <- 2014 - summary_birthyr
> summary_race <- cces_big$race
> summary_income <- cces_big$faminc
> summary_news <- cces_big$newsint
> summary_news[summary_news %in% c("Skipped", "Not Asked")] <- NA
> summary_tipi_extraverted <- ifelse(as.numeric(cces_big$NYA333) < 8, as.numeric(cces_big$NYA333), NA) 
> summary_tipi_critical <- ifelse(as.numeric(cces_big$NYA334) < 8, as.numeric(cces_big$NYA334), NA) 
> summary_tipi_dependable <- ifelse(as.numeric(cces_big$NYA335) < 8, as.numeric(cces_big$NYA335), NA) 
> summary_tipi_anxious <- ifelse(as.numeric(cces_big$NYA336) < 8, as.numeric(cces_big$NYA336), NA) 
> summary_tipi_open <- ifelse(as.numeric(cces_big$NYA337) < 8, as.numeric(cces_big$NYA337), NA) 
> summary_tipi_reserved <- ifelse(as.numeric(cces_big$NYA338) < 8, as.numeric(cces_big$NYA338), NA)
> summary_tipi_sympathetic <- ifelse(as.numeric(cces_big$NYA339) < 8, as.numeric(cces_big$NYA339), NA)
> summary_tipi_disorganized <- ifelse(as.numeric(cces_big$NYA340) < 8, as.numeric(cces_big$NYA340), NA)
> summary_tipi_calm <- ifelse(as.numeric(cces_big$NYA341) < 8, as.numeric(cces_big$NYA341), NA)
> summary_tipi_conventional <- ifelse(as.numeric(cces_big$NYA342) < 8, as.numeric(cces_big$NYA342), NA)
> summary_extra <- punif((summary_tipi_extraverted + (8 - summary_tipi_reserved)),2,14)
> summary_agree <- punif((summary_tipi_sympathetic + (8 - summary_tipi_critical)),2,14)
> summary_consc <- punif((summary_tipi_dependable + (8 - summary_tipi_disorganized)),2,14)
> summary_neuro <- 1-punif((summary_tipi_calm + (8 - summary_tipi_anxious)),2,14)
> summary_openn <- punif((summary_tipi_open + (8 - summary_tipi_conventional)),2,14)
> summary_educ <- cces_big$educ
> 
> # recoding income to make it more consistent (for some reason, several cutoffs were used and some overlap)
> summary_income[which(summary_income == "$150,000 - $199,999")] <- levels(summary_income)[17] 
> summary_income[which(summary_income == "$200,000 - $249,999")] <- levels(summary_income)[17] 
> summary_income[which(summary_income == "$250,000 - $349,999")] <- levels(summary_income)[17] 
> summary_income[which(summary_income == "$350,000 - $499,999")] <- levels(summary_income)[17] 
> summary_income[which(summary_income == "$500,000 or more")] <- levels(summary_income)[17] 
> summary_income[which(summary_income == "$250,000 or more ")] <- levels(summary_income)[17] 
> summary_income <- factor(summary_income)
> 
> 
> # table b-1
> placements <- data.frame(rbind(table(cces_big$CC334A), 
+                                table(cces_big$CC334C),
+                                table(cces_big$CC334D),
+                                table(cces_big$CC334E),
+                                table(cces_big$CC334F),
+                                table(cces_big$CC334G),
+                                table(cces_big$CC334K),
+                                table(cces_big$CC334L),
+                                table(cces_big$CC334M),
+                                table(cces_big$CC334W)),
+                          row.names = c("Self",
+                                        "Obama",
+                                        "Clinton",
+                                        "Cruz",
+                                        "Paul",
+                                        "Bush",
+                                        "Democratic Party",
+                                        "Republican Party",
+                                        "Tea Party",
+                                        "Supreme Court"))
> 
> placements$"Don't Know/Skipped" <- placements$Not.sure + placements$Skipped
> placements <- subset(placements, select = -c(Not.sure, Skipped))[,c(1:7,9,8)]
> placements$"Total Asked" <- apply(subset(placements, select = -c(Not.Asked)), 1, sum)
> print(xtable(placements, digits = 3), type = "html", file = "table-b1.html")
> 
> 
> # creating a mode function
> Mode <- function(x) {
+   ux <- unique(x)
+   ux[which.max(tabulate(match(x, ux)))]
+ }
> 
> 
> summary_stats <- function(x){
+ 	foo_mean <- mean(x, na.rm = TRUE)
+ 	foo_median <- median(x, na.rm = TRUE)
+ 	foo_mode <- Mode(x)
+ 	foo_min <- min(x, na.rm = TRUE)
+ 	foo_max <- max(x, na.rm = TRUE)
+ 	foo_sd <- sd(x, na.rm = TRUE)
+ 	foo_placements <- sum(!is.na(x))
+ 	foo_out <- data.frame(Mean = foo_mean,
+ 	                      Median = foo_median,
+ 	                      Mode = foo_mode,
+ 	                      Minimum = foo_min,
+ 	                      Maximum = foo_max,
+ 	                      SD = foo_sd,
+ 	                      Placements = foo_placements) 
+ 	return(round(foo_out, digits = 3))
+ 	}
> 	
> 
> 
> # table b-2
> tipi.table <- apply(data.frame(Openness = as.numeric(summary_openn), 
+                                Conscientiousness = as.numeric(summary_consc), 
+                                Extraversion = as.numeric(summary_extra), 
+                                Agreeableness = as.numeric(summary_agree), 
+                                Neuroticism = as.numeric(summary_neuro)), 2, summary_stats)
> tipi.table <- ldply(tipi.table, data.frame)
> colnames(tipi.table)[1] <- "Personality Trait"
> print(xtable(tipi.table, digits = 3), type = "html", file = "table-b2.html")
> 
> # table b-3
> other.table <- apply(data.frame(Female = as.numeric(summary_gender == "Female"),
+                                 Age = as.numeric(summary_age),
+                                 Black = as.numeric(summary_race == "Black"),
+                                 Hispanic = as.numeric(summary_race == "Hispanic"),
+                                 "Other Race" = as.numeric(!(summary_race %in% c("White", "Black", "Hispanic"))),
+                                 Education = as.numeric(summary_educ),
+                                 "High News Interest" = as.numeric(summary_news == "Most of the time"),
+                                 "Unknown News Interest" = as.numeric(summary_news == "Don't know"),
+                                 Income = as.numeric(summary_income),
+                                 "Income Refused" = as.numeric(cces_big$faminc == "Prefer not to say"), 
+                                 "Employed Full-Time" = as.numeric(summary_employ == "Full-time"),
+                                 "Employed Part-Time" = as.numeric(summary_employ == "Part-time"),
+                                 Unemployed = as.numeric(summary_employ == "Unemployed"),
+                                 Retired = as.numeric(summary_employ == "Retired")), 2, summary_stats)
> other.table <- ldply(other.table, data.frame)
> colnames(other.table)[1] <- "Variable"
> print(xtable(other.table, digits = 3), type = "html", file = "table-b3.html")
>       
>       
> # reconverting big five back to seven-point scale to be consistent with other code
> # rounding is due to machine precision issues  
> cces_big$self_extra <- round(as.numeric(1 + summary_extra*6), digits = 10)
> cces_big$self_agree <- round(as.numeric(1 + summary_agree*6), digits = 10)
> cces_big$self_consc <- round(as.numeric(1 + summary_consc*6), digits = 10)
> cces_big$self_emoti <- round(as.numeric(1 + (1-summary_neuro)*6), digits = 10)
> cces_big$self_openn <- round(as.numeric(1 + summary_openn*6), digits = 10)
> 
> 
> cces <- cces_big[!is.na(cces_big $CC421a) & 
+                  !is.na(cces_big$self_extra) & 
+                  !is.na(cces_big$self_emoti) & 
+                  !is.na(cces_big$self_openn)& 
+                  !is.na(cces_big$self_agree) & 
+                  !is.na(cces_big$self_consc),]  
> 
> save(cces, file = "CCES2014.Rda")                 
